import requests
import os
import json
import re
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}

upload_url = 'https://s.taobao.com/image'

imgname = 'apple.jpeg'
imgpath = './tmp/apple.jpeg'

def tryRequest():
    try:
        files = {'imgfile': (imgname, open(imgpath, 'rb'), 'image/jpeg'), 'cross': (None, 'taobao'),
             'type': (None, 'iframe')}
    
        r = requests.post(upload_url, headers=headers, files=files)
        tmp = r.text
        print('tmp = ', tmp)
        # tmp = <script>document.domain="taobao.com";</script>{"status":1,"name":"TB1usO8aoOWBKNjSZKzXXXfWFXa","url":"//g-search1.alicdn.com/img/bao/uploaded/i4/TB1usO8aoOWBKNjSZKzXXXfWFXa","error":false}

        name = json.loads(tmp.split('>')[2])['name']            
        # 拼接get请求
        # https://s.taobao.com/search?
        # q=&imgfile=&commend=all&ssid=s5-e&search_type=item
        # &sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.2
        # &ie=utf8&initiative_id=tbindexz_20170306&tfsid=TB1CF2icf5TBuNjSspcXXbnGFXa
        # &app=imgsearch
        search_url = 'https://s.taobao.com/search?q=&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306&tfsid=' + name + '&app=imgsearch'
        
        r2 = requests.get(search_url,headers=headers)
        tmp2 = r2.text
        
        #tmp2 = r2.html.search('')
        #print('tmp2 = ',tmp2)

        htmlParse(tmp2)

        # outhtml = open('out.html','w') 
        # outhtml.write(tmp2)
        # outhtml.close()
    except Exception as e:
        print('Exception e', e)

def htmlParse(h):
    if h == None:
        return
    soup = BeautifulSoup(h, 'html.parser')
    scrResult = soup.find_all('script')
    # 取size最大的script中内容
    biggestScr = ''
    biggestSize = 0
    for sr in scrResult:
        if sr.string == None:
            continue
        sz = len(sr.string)
        if sz > 0 and sz > biggestSize:
            biggestSize = sz
            biggestScr = sr
        
    print(biggestSize)
    # 用正则 将 "itemlist": { xxxx },"sharebar": 中的内容捞出来
    rule = r'"itemlist":(.*?),"sharebar"'
    rlist = re.findall(rule,str(biggestScr))
    print(len(rlist[0]))

    return rlist[0]

tryRequest()